Chronological Clustering Report

The following code digs into the chronological clustering of both the community size spectrum slopes, and the mean sizes across all years of the NMFS groundfish survey.

Data is prepared and updated using {targets} to ensure a consistent data state and a reproducible workflow.

Target Data

Data for the report comes directly from the {targets} workflow found in _targets.R.

####  Data  ####

####  Size Spectrum Targets 

# SS and manual bins together
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(size_spectrum_indices))   
size_spectrum_indices <- size_spectrum_indices  %>% 
  mutate(season = fct_rev(season),
         survey_area = factor(survey_area, 
                              levels = c("GoM", "GB", "SNE", "MAB")),
         yr = as.numeric(as.character(Year)))


####  OISST Data

# OISST for all the regions
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(regional_oisst))          


####  Size Data Targets

# 1. Biological data used as input
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(nefsc_1g))            

# 2. Mean sizes grouped on - yr, season, survey area, taxon group, fishery status
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_individual_sizes))   

# 3. Mean sizes using same groupings as the size spectrum indices
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_sizes_ss_groups))

# quick little format
nefsc_1g <- nefsc_1g %>% 
  mutate(fishery = case_when(
    fishery == "com" ~ "Commercially Targeted",
    fishery == "nc" ~ "Not Commercially Targeted",
    TRUE ~ "Not Labelled"
  ))

Exploration of Abundance/Size Information

# Cut up discrete length and weight bins
nefsc_size_bins <- nefsc_1g %>% 
  mutate(
    length_bin = case_when(
      LngtClass <= 5   ~ "0 - 5cm",
      LngtClass <= 10  ~ "5 - 10cm",
      LngtClass <= 25  ~ "10 - 25cm",
      LngtClass <= 50  ~ "25 - 50cm",
      LngtClass <= 100 ~ "50 - 100cm",
      LngtClass >= 100 ~ ">= 100cm"),
    length_bin = factor(length_bin, levels = c(
      "0 - 5cm", "5 - 10cm", "10 - 25cm",
      "25 - 50cm", "50 - 100cm", ">= 100cm")))


# Weight bins
nefsc_size_bins <- nefsc_size_bins %>% 
  mutate(
    weight_bin = case_when(
      ind_weight_kg <= 0.001 ~ "0 - 1g",
      ind_weight_kg <= 0.005 ~ "1 - 5g",
      ind_weight_kg <= 0.010 ~ "5 - 10g",
      ind_weight_kg <= 0.050 ~ "10 - 50g",
      ind_weight_kg <= 0.100 ~ "50 - 100g",
      ind_weight_kg <= 0.500 ~ "100 - 500g",
      ind_weight_kg <= 1.000 ~ ".5 - 1kg",
      ind_weight_kg <= 5.000 ~ "1 - 5kg",
      ind_weight_kg <= 10.00 ~ "5 - 10kg",
      ind_weight_kg >= 10.00 ~ ">= 10kg" ),
    weight_bin = factor(weight_bin, levels = c(
      "0 - 1g", "1 - 5g", "5 - 10g", "10 - 50g",
      "50 - 100g", "100 - 500g", ".5 - 1kg",
      "1 - 5kg", "5 - 10kg", ">= 10kg")))


# Rename the functional groups
nefsc_size_bins <- nefsc_size_bins %>% 
  mutate(
    spec_class = case_when(
      spec_class == "gf"  ~ "Groundfish",
      spec_class == "pel" ~ "Pelagic",
      spec_class == "dem" ~ "Demersal",
      spec_class == "el"  ~ "Elasmobranch",
      spec_class == "<NA>" ~ "NA"
    ))

# Make regions go N->S
nefsc_size_bins <- nefsc_size_bins %>% 
  mutate(
    survey_area = factor(survey_area, levels = c("GoM", "GB", "SNE", "MAB")
    ))

Regional Differences

# Do some grouping
regional_totals <- nefsc_size_bins %>% 
  group_by(Year, survey_area) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg)) %>% 
  ungroup()

Length Bins

# Get fraction for different length/weight classes

# length bins
regional_lengths <- nefsc_size_bins %>% 
  group_by(Year, survey_area, length_bin) %>% 
  summarise(lenbin_survey_catch = sum(Number),
            lenbin_lw_bio       = sum(sum_weight_kg),
            lenbin_strat_abund  = sum(expanded_abund_s),
            lenbin_strat_lw_bio = sum(stratified_sum_bodymass)) %>% 
  left_join(regional_totals) %>% 
  mutate(
    perc_total_catch  = (lenbin_survey_catch - total_survey_catch) * 100,
    perc_lw_bio       = (lenbin_lw_bio - total_lw_bio) * 100,
    perc_strat_abund  = (lenbin_strat_abund - total_strat_abund) * 100,
    perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(regional_lengths, 
       aes(Year, lenbin_strat_abund, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~survey_area) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance", 
       x = "", 
       fill = "Length Bin") 

# Expected Biomass for Entire Survey Area
ggplot(regional_lengths, 
       aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~survey_area) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin")+ theme(legend.position = "none")

Weight Bins

# weight bins
regional_weights <- nefsc_size_bins %>% 
  group_by(Year, survey_area, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(regional_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(regional_weights, 
       aes(Year, wtbin_strat_abund, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~survey_area) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance" ,
       x = "",
       fill = "Weight Bin")

# Expected Biomass for Entire Survey Area
ggplot(regional_weights, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~survey_area) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area", 
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Weight Bin") + theme(legend.position = "none")

Seasonal Differences

# Do some grouping
seasonal_totals <- nefsc_size_bins %>% 
  group_by(Year, season) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg)) %>% 
  ungroup()

Length Bins

# Get fraction for different length/weight classes

# length bins
seasonal_lengths <- nefsc_size_bins %>% 
  group_by(Year, season, length_bin) %>% 
  summarise(lenbin_survey_catch = sum(Number),
            lenbin_lw_bio       = sum(sum_weight_kg),
            lenbin_strat_abund  = sum(expanded_abund_s),
            lenbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(seasonal_totals) %>% 
  mutate(
    perc_total_catch  = (lenbin_survey_catch - total_survey_catch) * 100,
    perc_lw_bio       = (lenbin_lw_bio - total_lw_bio) * 100,
    perc_strat_abund  = (lenbin_strat_abund - total_strat_abund) * 100,
    perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(seasonal_lengths, 
       aes(Year, lenbin_strat_abund, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~season) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance" ,
       x = "", 
       fill = "Length Bin")

# Expected Biomass for Entire Survey Area
ggplot(seasonal_lengths, 
       aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~season) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area", 
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin") + theme(legend.position = "none")

Weight Bins

# weight bins
seasonal_weights <- nefsc_size_bins %>% 
  group_by(Year, season, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(seasonal_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(seasonal_weights, 
       aes(Year, wtbin_strat_abund, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~season) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance" ,
       x = "", 
       fill = "Weight Bin")

# Expected Biomass for Entire Survey Area
ggplot(seasonal_weights, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~season) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Weight Bin") + theme(legend.position = "none")

Functional Group Diffferences

# Do some grouping
fgroup_totals <- nefsc_size_bins %>% 
  group_by(Year, spec_class) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg)) %>% 
  ungroup()

Length Bins

# Get fraction for different length/weight classes

# length bins
fgroup_lengths <- nefsc_size_bins %>% 
  group_by(Year, spec_class, length_bin) %>% 
  summarise(lenbin_survey_catch = sum(Number),
            lenbin_lw_bio       = sum(sum_weight_kg),
            lenbin_strat_abund  = sum(expanded_abund_s),
            lenbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(fgroup_totals) %>% 
  mutate(
    perc_total_catch  = (lenbin_survey_catch - total_survey_catch) * 100,
    perc_lw_bio       = (lenbin_lw_bio - total_lw_bio) * 100,
    perc_strat_abund  = (lenbin_strat_abund - total_strat_abund) * 100,
    perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fgroup_lengths, 
       aes(Year, lenbin_strat_abund, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~spec_class) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance" ,
       x = "", 
       fill = "Length Bin")

# Expected Biomass for Entire Survey Area
ggplot(fgroup_lengths, 
       aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~spec_class) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin")

Weight Bins

# weight bins
fgroup_weigths <- nefsc_size_bins %>% 
  group_by(Year, spec_class, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(fgroup_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fgroup_weigths, 
       aes(Year, wtbin_strat_abund, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~spec_class) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance", 
       x = "", 
       fill = "Weight Bin")

# Expected Biomass for Entire Survey Area
ggplot(fgroup_weigths, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~spec_class) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Weight Bin")

Functional Group Key

Thought it may be helpful to have a table here to check which species are currently assigned to which group.

# Display table of whichever specis fall into the categories
nefsc_size_bins %>% 
  distinct(SpecCode, scientific_name, spec_class) %>% 
  rename(`Common Name` = SpecCode, 
         `Scientific Name` = scientific_name,
         `Functional Group` = spec_class) %>% 
  arrange(`Functional Group`, `Common Name`) %>% 
  DT::datatable(filter = "top", rownames = FALSE)

Fishery Status Diffferences

# Do some grouping
fish_totals <- nefsc_size_bins %>% 
  group_by(Year, fishery) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg)) %>% 
  ungroup()

Length Bins

# Get fraction for different length/weight classes

# length bins
fish_lengths <- nefsc_size_bins %>% 
  group_by(Year, fishery, length_bin) %>% 
  summarise(lenbin_survey_catch = sum(Number),
            lenbin_lw_bio       = sum(sum_weight_kg),
            lenbin_strat_abund  = sum(expanded_abund_s),
            lenbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(fish_totals) %>% 
  mutate(
    perc_total_catch  = (lenbin_survey_catch - total_survey_catch) * 100,
    perc_lw_bio       = (lenbin_lw_bio - total_lw_bio) * 100,
    perc_strat_abund  = (lenbin_strat_abund - total_strat_abund) * 100,
    perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fish_lengths, 
       aes(Year, lenbin_strat_abund, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~fishery) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance" ,
       x = "", 
       fill = "Length Bin")

# # Biomass from Survey using L-W

# Expected Biomass for Entire Survey Area
ggplot(fish_lengths, 
       aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~fishery) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin")

Weight Bins

# weight bins
fish_weigths <- nefsc_size_bins %>% 
  group_by(Year, fishery, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(fish_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fish_weigths, 
       aes(Year, wtbin_strat_abund, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~fishery) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Predicted Abundance - Full Survey Area", 
       title = "Abundance",
       subtitle = "Expected Area-Stratified Abundance", 
       x = "", 
       fill = "Weight Bin")

# Expected Biomass for Entire Survey Area
ggplot(fish_weigths, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_wrap(~fishery) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Weight Bin")

Fishery Group Key

Thought it may be helpful to have a table here to check which species are currently assigned to which group.

# Display table of whichever specis fall into the categories
nefsc_size_bins %>% 
  distinct(SpecCode, scientific_name, fishery) %>% 
  rename(`Common Name` = SpecCode, 
         `Scientific Name` = scientific_name,
         `Fishery Status` = fishery) %>% 
  arrange(`Fishery Status`, `Common Name`) %>% 
  DT::datatable(filter = "top", rownames = FALSE)

Community Size Spectra Results

Community size spectrum slopes were estimated using 2 methods for comparison, using a minimum individual biomass of 1 gram, and using area stratified abundances.

The first was using the methods of the {sizeSpectra} package which were shown to be the most accurate when using simulated data.

The second method uses binned abundances, with bins of width 0.5 on a log10 scale, so 10^0 - 10^0.5 etc. These bins were then normalized by dividing the abundances by the bin width to account for the increasing bin size.

Results from Both Methods

# Pull the group ID for the slopes grouped on year, season, and region
warmem_group_slopes <- size_spectrum_indices %>% 
  filter(`group ID`== "single years * season * region")


# Or just regions and years
year_region_slopes <- size_spectrum_indices %>% 
  filter(`group ID` == "single years * region")

Biomass Data to Match Size Spectrum Analysis

Region Only

Season makes it kind of tricky to understand what is going on, so as a starting point I will show the results of just single years and the data from both seasons

# Do some grouping
year_region_totals <- nefsc_size_bins %>% 
  group_by(Year, survey_area) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg),
            .groups = "drop") 


# weight bins
year_region_weights <- nefsc_size_bins %>% 
  group_by(Year, survey_area, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg),
            .groups = "drop") %>% 
  left_join(year_region_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)


# Expected Biomass for Entire Survey Area
ggplot(year_region_weights, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_grid(survey_area~.) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin")

Region*Season

The first lens to look at is the seasonal variation across all the different areas. This is the typical grouping that we have been focusing on.

# Do some grouping
warmem_totals <- nefsc_size_bins %>% 
  group_by(Year, survey_area, season) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg)) %>% 
  ungroup()


# weight bins
warmem_weights <- nefsc_size_bins %>% 
  group_by(Year, survey_area, season, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(warmem_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)


# Expected Biomass for Entire Survey Area
ggplot(warmem_weights, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_grid(survey_area~season) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin")

Region*Functional Group

The second lens that I feel is important is the functional groups. This is the not the typical grouping that we have been focusing on, but clears the air on where the biomass increase is coming from.

# Do some grouping
fgroup_totals <- nefsc_size_bins %>% 
  group_by(Year, survey_area, spec_class) %>% 
  summarise(total_survey_catch = sum(Number),
            total_lw_bio = sum(sum_weight_kg),
            total_strat_abund = sum(expanded_abund_s),
            total_strat_lw_bio = sum(sum_weight_kg)) %>% 
  ungroup()


# weight bins
fgroup_weights <- nefsc_size_bins %>% 
  group_by(Year, survey_area, spec_class, weight_bin) %>% 
  summarise(wtbin_survey_catch = sum(Number),
            wtbin_lw_bio       = sum(sum_weight_kg),
            wtbin_strat_abund  = sum(expanded_abund_s),
            wtbin_strat_lw_bio = sum(sum_weight_kg)) %>% 
  left_join(fgroup_totals) %>% 
  mutate(
    perc_total_catch  = (wtbin_survey_catch / total_survey_catch) * 100,
    perc_lw_bio       = (wtbin_lw_bio / total_lw_bio) * 100,
    perc_strat_abund  = (wtbin_strat_abund / total_strat_abund) * 100,
    perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)


# Expected Biomass for Entire Survey Area
ggplot(fgroup_weights, 
       aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
  geom_col(position = "stack") +
  facet_grid(spec_class~survey_area) +
  scale_fill_gmri() +
  scale_y_continuous(labels = comma_format()) +
  labs(y = "Total Expected Biomass (kg) - Full Survey Area",  
       title = "Biomass",
       subtitle = "Expected Area-Stratified Biomass", 
       x = "", 
       fill = "Length Bin")

Edwards Methodology

The Edwards methodology differs from the other methods for estimating size spectra by avoiding the subjective decisions around how to bin data prior to fitting the log-linear regression.

Instead, Edwards uses the individual size distributions (how many individuals of any given length/weight). Abundances are totaled into discrete size bins based on expected biomass at length and length + 1, and individuals that fall within those bins are totaled to get abundances across a continuous distribution of individual bodymass.

The next difference is that the individual body size data is presumed to follow a bounded power-law distribution, with a minimum and maximum body size. Using the individual size data, maximum likelihood estimation is used to solve for the parameter (b) that minimizes the negative log-likelihood.

Year and Region Only

Long-Term Patterns

# Just years and regions
(ss_patterns <- year_region_slopes %>% 
  ggplot(aes(yr, b, color = survey_area)) +
  geom_line(aes(group = survey_area)) +
  geom_point(alpha = 0.6) +
  facet_grid(survey_area~.) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Size Spectrum Slope (b)", 
       color = "",
       title = "Results from Area-Stratified Abundance - Edwards Method") 
)

Chronological Clustering

Prep and Clustering Code

# Reshaping
# Goal: Row for Years, columns for each different group
cluster_dat <- year_region_slopes %>% 
  select(Year, survey_area, b) 

# Pivot wider
ss_dat <- cluster_dat %>% 
  pivot_wider(names_from = c(survey_area), 
              values_from = b, 
              names_sep = "_") %>% 
  column_to_rownames(var = "Year")
# Function to run chronological clustering
run_chron_clust <- function(in_dat){
  
  # Get Euclidean Distances
  eucdist <- vegdist(in_dat,
                     method = "euclidean",
                     binary = FALSE,
                     diag = FALSE,
                     upper = FALSE,
                     na.rm = TRUE)
  
  # Perform Chronological Clustering on distances
  cl <- chclust(eucdist, method = "coniss")
  
  # Return list
  return(list(eucdist = eucdist, cl = cl))
}


# Run for sizeSpectra Results
ss_chron <- run_chron_clust(in_dat = ss_dat)

Broomstick Plot

# broken stick model
vegan::bstick(ss_chron$cl, plot=T)

Dendrogram

plot(ss_chron$cl, 
     hang = -0.1, 
     axes = FALSE, 
     cex = 0.8, 
     horiz = F)
axis(side = 2, cex.axis = 1)
title("sizeSpectra Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)

Slopes with Breakpoints

ss_patterns +
  geom_vline(aes(xintercept = 1984.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2008.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2003.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1975.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1973.5), color = "gray40", linetype = 2, size = 0.7) 

Year, Region, and Season

Long-Term Patterns

# Plot the sizeSpectra slopes - year*region*season
(ss_patterns <- warmem_group_slopes %>% 
  ggplot(aes(yr, b, color = survey_area)) +
  geom_line(aes(group = survey_area)) +
  geom_point(alpha = 0.6) +
  facet_grid(survey_area~season) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Size Spectrum Slope (b)", 
       color = "",
       title = "Results from Area-Stratified Abundance - Edwards Method") 
)

Chronological Clustering

Prep and Clustering Code

# Reshaping
# Goal: Row for Years, columns for each different group
cluster_dat <- warmem_group_slopes %>% 
  select(Year, season, survey_area, b) 

# Pivot wider
ss_dat <- cluster_dat %>% 
  pivot_wider(names_from = c(season, survey_area), 
              values_from = b, 
              names_sep = "_") %>% 
  column_to_rownames(var = "Year")
# Function to run chronological clustering
run_chron_clust <- function(in_dat){
  
  # Get Euclidean Distances
  eucdist <- vegdist(in_dat,
                     method = "euclidean",
                     binary = FALSE,
                     diag = FALSE,
                     upper = FALSE,
                     na.rm = TRUE)
  
  # Perform Chronological Clustering on distances
  cl <- chclust(eucdist, method = "coniss")
  
  # Return list
  return(list(eucdist = eucdist, cl = cl))
}


# Run for sizeSpectra Results
ss_chron <- run_chron_clust(in_dat = ss_dat)

Broomstick Plot

# broken stick model
vegan::bstick(ss_chron$cl, plot=T)

Dendrogram

plot(ss_chron$cl, 
     hang = -0.1, 
     axes = FALSE, 
     cex = 0.8, 
     horiz = F)
axis(side = 2, cex.axis = 1)
title("sizeSpectra Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)

Slopes with Breakpoints

ss_patterns +
  geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2008.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2004.5), color = "gray40", linetype = 2, size = 0.7) 

Log10 Binning

Slope

Long-Term Patterns

# plot trends of log10 slopes
(l10_patterns <- warmem_group_slopes %>% 
  ggplot(aes(yr, l10_slope_strat, color = survey_area)) +
  geom_line(aes(group = survey_area)) +
  geom_point(alpha = 0.6) +
  facet_grid(survey_area~season) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Size Spectrum Slope (b)", 
       color = "",
       title = "Results from Area-Stratified Abundance - log10 bins")
)

Chronological Clustering

Prep and Clustering Code

# Reshaping
# Goal: Row for Years, columns for each different group
l10_cluster_dat <- warmem_group_slopes %>% 
  select(Year, season, survey_area, l10_slope_strat) 

# Pivot wider
l10_dat <- l10_cluster_dat %>% 
  pivot_wider(names_from = c(season, survey_area), 
              values_from = l10_slope_strat, 
              names_sep = "_") %>% 
  column_to_rownames(var = "Year")

# Run clustering
l10_clust <- run_chron_clust(in_dat = l10_dat)

Broomstick Plot

# broken stick model
vegan::bstick(l10_clust$cl, plot=T)

Dendrogram

plot(l10_clust$cl, 
     hang = -0.1, 
     axes = FALSE, 
     cex = 0.8, 
     horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Size Spectrum Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)

Slopes with Breakpoints

l10_patterns +
  geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1996.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1995.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2001.5), color = "gray40", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2002.5), color = "gray40", linetype = 2, size = 0.7)

Intercept

Long-Term Patterns

# plot trends of log10 slopes
(int_patterns <- warmem_group_slopes %>% 
  ggplot(aes(yr, l10_int_strat, color = survey_area)) +
  geom_line(aes(group = survey_area)) +
  geom_point(alpha = 0.6) +
  facet_grid(survey_area~season) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Size Spectrum Intercept", 
       color = "",
       title = "Results from Area-Stratified Abundance - log10 bins")
)

Chronological Clustering

Prep and Clustering Code

# Reshaping
# Goal: Row for Years, columns for each different group
l10_intercept_dat <- warmem_group_slopes %>% 
  select(Year, season, survey_area, l10_int_strat) 

# Pivot wider
intercept_dat <- l10_intercept_dat %>% 
  pivot_wider(names_from = c(season, survey_area), 
              values_from = l10_int_strat, 
              names_sep = "_") %>% 
  column_to_rownames(var = "Year")

# Run Clustering
l10_ints <- run_chron_clust(in_dat = intercept_dat)

Broomstick Plot

# broken stick model
vegan::bstick(l10_ints$cl, plot=T)

Dendrogram

plot(l10_ints$cl, 
     hang = -0.1, 
     axes = FALSE, 
     cex = 0.8, 
     horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Bins Intercept - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)

Intercepts with Breakpoints

# Re-plot patterns with breaks
int_patterns +
  geom_vline(aes(xintercept = 1987.5), color = "gray50", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1996.5), color = "gray50", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1995.5), color = "gray50", linetype = 2, size = 0.7) +
  labs(subtitle = "Chronological Clustering Breakpoints")

Fit Statidtics (adjusted r-squared)

For each of the slope/intercepts derived using a linear model and binned data I also pulled the adjusted r-square to get a sense of whether or not certain groups had poor fits that should be investigated.

# Reshaping
# Goal: Row for Years, columns for each different group
l10_fit_dat <- warmem_group_slopes %>% 
  select(Year, season, survey_area, l10_slope_strat, l10_int_strat, l10_rsqr_strat) %>% 
  mutate(yr = as.numeric(Year))


l10_fit_dat %>% 
  ggplot(aes(yr, l10_rsqr_strat, color = survey_area)) +
  geom_rect(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 0.2, 
            fill = "gray80", color = "transparent") +
  geom_hline(yintercept = 0.2, linetype = 2, size = 0.5, color = "gray50") +
  geom_line(aes(group = survey_area)) +
  geom_point(alpha = 0.6) +
  facet_grid(survey_area~season) +
  scale_color_gmri() +
  scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
  labs(x = "", 
       y = "Linear Model R-Square", 
       color = "",
       title = "Results from Area-Stratified Abundance - log10 bins")

Sea Surface Temperature

Long-Term Patterns

# plot trends of log10 slopes
(sst_patterns <- regional_oisst %>% 
  ggplot(aes(yr, sst_anom, color = survey_area)) +
  geom_line(aes(group = survey_area)) +
  geom_point(alpha = 0.6) +
  facet_grid(survey_area~.) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Mean Temperature Anomaly", 
       color = "",
       title = "")
)

Chronological Clustering

Prep and Clustering Code

# Pivot OISST Wider
oisst_dat <- regional_oisst %>% 
  select(yr, survey_area, sst_anom) %>% 
  pivot_wider(names_from = survey_area, values_from = sst_anom) %>% 
  column_to_rownames(var = "yr")

# Run clustering
sst_clust <- run_chron_clust(oisst_dat)

Broomstick Plot

# broken stick model
vegan::bstick(sst_clust$cl, plot=T)

Dendrogram

plot(sst_clust$cl, 
     hang = -0.1, 
     axes = FALSE, 
     cex = 0.8, 
     horiz = F)
axis(side = 2, cex.axis = 1.3)
title("SST Anomalies - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)

Anomalies with Breakpoints

# Re-plot patterns with breaks
sst_patterns +
  geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 1998.5), color = "gray50", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2002.5), color = "gray50", linetype = 2, size = 0.7) +
  geom_vline(aes(xintercept = 2005.5), color = "gray50", linetype = 2, size = 0.7) 

Biomass Patterns with Breakpoints

This section seeks to track different aspects of the biomass distribution during each of the breaks determined from the break point analyses.

Primarily 1. How is biomass changing for the different functional groups
2. What is the relative balance of biomass across functional groups, and within them, size bins

# Make groups based on breaks
edwards_breaks <- list("1970-1987" = 1970:1987,
                    "1988-2008" = 1988:2008,
                    "2009-2019" = 2009:2019)
temp_breaks    <- list("1981-1998" = 1981:1998, 
                    "1999-2002" = 1999:2002, 
                    "2003-2005" = 2003:2005,
                    "2006-2011" = 2006:2011, 
                    "2012-2021" = 2012:2021)

## label the edwards bins
nefsc_size_bins <- imap_dfr(edwards_breaks, function(x,y){
    nefsc_size_bins %>% 
      filter(Year %in% x) %>% 
      mutate(e_break = y,
             e_break = factor(e_break, levels = names(edwards_breaks)))
}) %>% arrange(Year)

# label the temp bins
nefsc_size_bins <- imap_dfr(temp_breaks, function(x,y){
    nefsc_size_bins %>% 
      filter(Year %in% x) %>% 
      mutate(t_break = y,
             t_break = factor(t_break, levels = names(temp_breaks)))
}) %>% arrange(Year)
# Track how things are changing across slope exponent breaks
functional_group_splits <- nefsc_size_bins %>% 
  split(.$e_break) %>% 
  imap(function(e_data, label){
    
    # 1. How is biomass trending within each functional group
    
    # Get that yearly summary
    yearly_data <- e_data %>% 
      group_by(Year, spec_class) %>% 
      summarise(
        total_strat_biom = sum(expanded_lwbio_s), 
        avg_ind_mass = weighted.mean(ind_weight_kg, Number),
        avg_ind_len = weighted.mean(LngtClass, Number),
        .groups = "drop")
    
    # run linear models
    biom_trend_lm   <- broom::tidy(lm(total_strat_biom ~ Year, 
                                      data = yearly_data))
    length_trend_lm <- broom::tidy(lm(avg_ind_len ~ Year, 
                                      data = yearly_data))
    imass_trend_lm <- broom::tidy(lm(avg_ind_mass ~ Year, 
                                      data = yearly_data))
    
    # report direction
    biom_slope     <- biom_trend_lm[[2,"estimate"]]
    len_slope      <- length_trend_lm[[2,"estimate"]]
    imass_slope      <- imass_trend_lm[[2,"estimate"]]
    biom_direction <- ifelse(biom_slope > 0, "increase", "decrease")
    len_direction  <- ifelse(len_slope > 0, "increase", "decrease")
    imass_direction  <- ifelse(imass_slope > 0, "increase", "decrease")
    
    # report significance
    biom_sig <- ifelse(biom_trend_lm[[2,"p.value"]] < 0.05, "significant", "not")
    len_sig  <- ifelse(length_trend_lm[[2,"p.value"]] < 0.05, "significant", "not")
    imass_sig  <- ifelse(imass_trend_lm[[2,"p.value"]] < 0.05, "significant", "not")
    
    # make table
    trend_table <- data.frame("ebreak"       = rep(label, 3),
                              "data_source"  = c("stratified_biomass", "avg_length", "avg_mass"),
                              "trend"        = c(biom_direction, len_direction, imass_direction),
                              "rate"         = c(biom_slope, len_slope, imass_slope),
                              "significance" = c(biom_sig, len_sig, imass_sig))
    
    # 2. Relative Biomass
    total_biom <- sum(e_data$expanded_lwbio_s)
    relative_biom <- e_data %>% 
      group_by(spec_class) %>% 
      summarise(n_years = n_distinct(Year),
                group_biom_total = sum(expanded_lwbio_s),
                avg_ann_biom = group_biom_total / n_years,
                biom_frac = group_biom_total / total_biom,
                avg_ind_mass = weighted.mean(ind_weight_kg, Number),
                avg_ind_len = weighted.mean(LngtClass, Number),
                .groups = "drop") %>% 
      mutate(ebreak = label)
    
    # return tables
    return(list(#"biom_lm" = biom_trend_lm,
                #"len_lm" = length_trend_lm,
                "trends" = trend_table, 
                "rel_bio" = relative_biom))
    
  })


# Pull out trends as a table
fgroup_trends <- map_dfr(functional_group_splits, ~ .x[["trends"]])


# Pull out relative biomass as a table
fgroup_rel_bio <- map_dfr(functional_group_splits, ~ .x[["rel_bio"]])

Relative Biomass

The following figure tracks the relative distribution of catch across the different functional groups.

fgroup_rel_bio %>% 
  ggplot(aes(ebreak, y = biom_frac, fill = spec_class)) +
  geom_col(position = "dodge") +
  scale_fill_gmri() +
  labs(x = "Stanzas from ISD Exponent Breakpoint Analysis", 
       y = "Relative Proportion of Stratified Biomass",
       fill = "")

Average Annual Biomass

This figure visualizes the avarage total biomass (per year) for each functional group within the different size spectrum stanzas.

fgroup_rel_bio %>% 
  ggplot(aes(ebreak, y = avg_ann_biom/1e6, fill = spec_class)) +
  geom_col(position = "dodge") +
  scale_fill_gmri() +
  labs(x = "Stanzas from ISD Exponent Breakpoint Analysis", 
       y = "Avg. Annual Biomass (million kg)",
       fill = "")

Average Individual Mass

This figure visualizes the average individual bodymass for each functional group within the different size spectrum stanzas.

fgroup_rel_bio %>% 
  ggplot(aes(ebreak, y = avg_ind_mass, fill = spec_class)) +
  geom_col(position = "dodge") +
  scale_fill_gmri() +
  labs(x = "Stanzas from ISD Exponent Breakpoint Analysis", 
       y = "Average Individual Bodymass (kg)",
       fill = "")

Average Individual Length

This figure does the same, but for individual length, not bodymass.

fgroup_rel_bio %>% 
  ggplot(aes(ebreak, y = avg_ind_len, fill = spec_class)) +
  geom_col(position = "dodge") +
  scale_fill_gmri() +
  labs(x = "Stanzas from ISD Exponent Breakpoint Analysis", 
       y = "Average Individual Length (cm)",
       fill = "")

Slope Check

After seeing no large difference in the things above that we expected to see, I plotted the distribution of size spectrum slopes for each stanza. Lot of within gorup variation, not much across group variation.

warmem_group_slopes <- imap_dfr(edwards_breaks, function(x,y){
    warmem_group_slopes %>% 
      filter(Year %in% x) %>% 
      mutate(e_break = y,
             e_break = factor(e_break, levels = names(edwards_breaks)))
}) %>% arrange(Year)


warmem_group_slopes %>% 
  ggplot(aes(x = e_break, y = b, color = e_break)) +
  scale_color_gmri() +
  geom_jitter(width = 0.1) +
  labs(x = "Stanzas from ISD Exponent Breakpoint Analysis", 
       y = "Individual Size Distribution Exponent",
       color = "")

 

A work by Adam A. Kemberling

Akemberling@gmri.org